#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author: IcySun
# 脚本功能：获取可用http代理
import urllib2
import socket
from bs4 import BeautifulSoup

def IsOpen(ip,port):
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    try:
        s.connect((ip,int(port)))
        s.shutdown(2)
        return True
    except:
        return False

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0'}
url = 'http://www.xicidaili.com/nn/'
hurl = urllib2.Request(url,headers=header)

html_doc = urllib2.urlopen(hurl).read()
soup = BeautifulSoup(html_doc)
trs = soup.find('table', id='ip_list').find_all('tr')
for tr in trs[1:]:
    tds = tr.find_all('td')
    ip = tds[2].text.strip()
    port = tds[3].text.strip()
    protocol = tds[6].text.strip()
    if protocol == 'HTTP' or protocol == 'HTTPS':
        print ip
        if IsOpen(ip, port):
            print ip + ":" + port + " is OK"
            with open("ava.txt","a") as ava:
                ava.write(ip + ":" + port + "\n")
